# Setup environment ----
library(parallel)
library(data.table)
library(e1071)
RNGkind("L'Ecuyer-CMRG")
source("R/functions.R")
results_path <- "results/replication/"
load(paste0(results_path, "house_analysis_data.RData"))
load(paste0(results_path, "senate_analysis_data.RData"))

# House SVM ----
house_svm_model <- svm(x = hou_X, y = hou_y)
cv_svm_hou <- cv_svm(hou_folds, hou_X, hou_y)
house_svm_data <- data.table(
  dem_spend_adv = hou_X[, "dem_spend_adv"],
  d_dem_spend_adv = ((
    predict(house_svm_model, Xp1H) -
      predict(house_svm_model, Xm1H)) /
      (2 * dxH)),
  incl = hou_X[, "bottom_tail"] + hou_X[, "top_tail"] == 0)
save(house_svm_model, cv_svm_hou, house_svm_data,
  file = paste0(results_path, "house_svm.RData"))

# Senate SVM ----
senate_svm_model <- svm(x = sen_X, y = sen_y)
cv_svm_sen <- cv_svm(sen_folds, sen_X, sen_y)
senate_svm_data <- data.table(
  dem_spend_adv = sen_X[, "dem_spend_adv"],
  d_dem_spend_adv = ((
    predict(senate_svm_model, Xp1S) -
      predict(senate_svm_model, Xm1S)) /
      (2 * dxS)),
  incl = sen_X[, "bottom_tail"] + sen_X[, "top_tail"] == 0)
save(senate_svm_model, cv_svm_sen, senate_svm_data,
  file = paste0(results_path, "senate_svm.RData"))

# Senate SVM counterfactuals ----
# estimate models to nonparametric bootstrapped datasets
set.seed(990264419)
senate_boot_svm_models <- mclapply(1:1000, function(i) {
  cat(format(Sys.time()), i, "\n")
  ok <- sample(nrow(sen_X), nrow(sen_X), replace = TRUE)
  svm(x = sen_X[ok, ], y = sen_y[ok])
}, mc.cores = 6)
save(senate_boot_svm_models,
  file = paste0(results_path, "senate_boot_svm_models.RData"))

# calc marginal effects on bootstrapped datasets,
#   then estimate loess curves of those marginal effects
set.seed(990264419)
senate_boot_svm_cond_ame <- do.call(cbind, mclapply(1:1000, function(i) {
  cat(format(Sys.time()), i, "\n")
  ok <- sample(nrow(sen_X), nrow(sen_X), replace = TRUE)
  xS_boot <- sen_X[ok, 1]
  dxS_boot <- setstep(xS_boot)
  Xp1S_boot <- make_plus_dx_dataset(sen_X[ok, ], dxS_boot, "sen")
  Xm1S_boot <- make_plus_dx_dataset(sen_X[ok, ], -dxS_boot, "sen")
  senate_boot_svm_data <- data.table(
    dem_spend_adv = sen_X[ok, "dem_spend_adv"],
    d_dem_spend_adv = ((
      predict(senate_boot_svm_models[[i]], Xp1S_boot) -
        predict(senate_boot_svm_models[[i]], Xm1S_boot)) /
        (2 * dxS_boot)),
    incl = sen_X[ok, "bottom_tail"] + sen_X[ok, "top_tail"] == 0)
  predict(
    senate_boot_svm_data[incl == TRUE, loess(d_dem_spend_adv ~ dem_spend_adv)],
    sen_X[sen_X[, "bottom_tail"] + sen_X[, "top_tail"] == 0, ])
}, mc.cores = 6))
save(senate_boot_svm_cond_ame,
  file = paste0(results_path, "senate_boot_svm_cond_ame.RData"))

# estimate counterfactuals
senate_data[, total_spending_dem_max :=
    (2 * real_rep_expenditure_w_outside + abs(sen_tail_lims[2])) /
    year_scale]
senate_data[, total_spending_rep_max :=
    (2 * real_dem_expenditure_w_outside + abs(sen_tail_lims[1])) /
    year_scale]
n_boots <- 1000
n_obs <- nrow(sen_X)
possible_values <- c(NA, 0,
  sen_tail_lims[2], sen_tail_lims[1])
senate_svm_counterfactuals <- CJ(boot = 1:n_boots,
  dem_spend_adv = possible_values, index = senate_data[, .I])
senate_svm_counterfactuals[, actual := as.numeric(is.na(dem_spend_adv))]
senate_svm_counterfactuals[is.na(dem_spend_adv),
  dem_spend_adv := sen_X[index, 1]]
year <-
  1980 * sen_X[, "y80"] + 1982 * sen_X[, "y82"] + 1984 * sen_X[, "y84"] +
  1986 * sen_X[, "y86"] + 1988 * sen_X[, "y88"] + 1990 * sen_X[, "y90"] +
  1992 * sen_X[, "y92"] + 1994 * sen_X[, "y94"] + 1996 * sen_X[, "y96"] +
  1998 * sen_X[, "y98"] + 2000 * sen_X[, "y00"] + 2002 * sen_X[, "y02"] +
  2004 * sen_X[, "y04"] + 2006 * sen_X[, "y06"] + 2008 * sen_X[, "y08"] +
  2010 * sen_X[, "y10"] + 2012 * sen_X[, "y12"] + 2014 * sen_X[, "y14"] +
  2016 * sen_X[, "y16"] + 2018 * sen_X[, "y18"]
senate_svm_counterfactuals[, year := year[index]]
# actual
sen_pred_X <- sen_X
pred <- do.call(rbind, mclapply(senate_boot_svm_models, predict,
  newdata = sen_pred_X, mc.cores = 6))
senate_svm_counterfactuals[actual == 1,
  boot_y := pred[cbind(boot,index)]]
# zero spending adv
sen_pred_X <- sen_X
sen_pred_X[, 1] <- 0
sen_pred_X[, 2] <- min(sen_X[, "log_total_spending"])
pred <- do.call(rbind, mclapply(senate_boot_svm_models, predict,
  newdata = sen_pred_X, mc.cores = 6))
senate_svm_counterfactuals[actual == 0 & dem_spend_adv == 0,
  boot_y := pred[cbind(boot,index)]]
# max dem spending
sen_pred_X <- sen_X
sen_pred_X[, 1] <- sen_tail_lims[2]
sen_pred_X[, 2] <- senate_data[, log10(total_spending_dem_max)]
pred <- do.call(rbind, mclapply(senate_boot_svm_models, predict,
  newdata = sen_pred_X, mc.cores = 6))
senate_svm_counterfactuals[actual == 0 &
    dem_spend_adv == sen_tail_lims[2],
  boot_y := pred[cbind(boot,index)]]
# max rep spending
sen_pred_X <- sen_X
sen_pred_X[, 1] <- sen_tail_lims[1]
sen_pred_X[, 2] <- senate_data[, log10(total_spending_rep_max)]
pred <- do.call(rbind, mclapply(senate_boot_svm_models, predict,
  newdata = sen_pred_X, mc.cores = 6))
senate_svm_counterfactuals[actual == 0 &
    dem_spend_adv == sen_tail_lims[1],
  boot_y := pred[cbind(boot,index)]]
save(senate_svm_counterfactuals,
  file = paste0(results_path, "senate_svm_counterfactuals.RData"))

# House SVM counterfactuals ----
# estimate models to nonparametric bootstrapped datasets
set.seed(990264419)
house_boot_svm_models <- mclapply(1:1000, function(i) {
  cat(format(Sys.time()), i, "\n")
  ok <- sample(nrow(hou_X), nrow(hou_X), replace = TRUE)
  svm(x = hou_X[ok, ], y = hou_y[ok])
}, mc.cores = 6)
save(house_boot_svm_models,
  file = paste0(results_path, "house_boot_svm_models.RData"))

# calc marginal effects on bootstrapped datasets,
#   then estimate loess curves of those marginal effects
set.seed(54266132)
house_boot_svm_cond_ame <- do.call(cbind, mclapply(1:1000, function(i) {
  cat(format(Sys.time()), i, "\n")
  ok <- sample(nrow(hou_X), nrow(hou_X), replace = TRUE)
  xH_boot <- hou_X[ok, 1]
  dxH_boot <- setstep(xH_boot)
  Xp1H_boot <- make_plus_dx_dataset(hou_X[ok, ], dxH_boot, "hou")
  Xm1H_boot <- make_plus_dx_dataset(hou_X[ok, ], -dxH_boot, "hou")
  house_boot_svm_data <- data.table(
    dem_spend_adv = hou_X[ok, "dem_spend_adv"],
    d_dem_spend_adv = ((
      predict(house_boot_svm_models[[i]], Xp1H_boot) -
        predict(house_boot_svm_models[[i]], Xm1H_boot)) /
        (2 * dxH_boot)),
    incl = hou_X[ok, "bottom_tail"] + hou_X[ok, "top_tail"] == 0)
  predict(
    house_boot_svm_data[incl == TRUE, loess(d_dem_spend_adv ~ dem_spend_adv)],
    hou_X[hou_X[, "bottom_tail"] + hou_X[, "top_tail"] == 0, ])
}, mc.cores = 6))
save(house_boot_svm_cond_ame,
  file = paste0(results_path, "house_boot_svm_cond_ame.RData"))

# estimate counterfactuals
house_data[, total_spending_dem_max :=
    (2 * real_rep_expenditure_w_outside + abs(hou_tail_lims[2])) /
    year_scale]
house_data[, total_spending_rep_max :=
    (2 * real_dem_expenditure_w_outside + abs(hou_tail_lims[1])) /
    year_scale]
n_boots <- 1000
n_obs <- nrow(hou_X)
possible_values <- c(NA, 0,
  hou_tail_lims[2], hou_tail_lims[1])
house_svm_counterfactuals <- CJ(boot = 1:n_boots,
  dem_spend_adv = possible_values, index = house_data[, .I])
house_svm_counterfactuals[, actual := as.numeric(is.na(dem_spend_adv))]
house_svm_counterfactuals[is.na(dem_spend_adv),
  dem_spend_adv := hou_X[index, 1]]
year <-
  1980 * hou_X[, "y80"] + 1982 * hou_X[, "y82"] + 1984 * hou_X[, "y84"] +
  1986 * hou_X[, "y86"] + 1988 * hou_X[, "y88"] + 1990 * hou_X[, "y90"] +
  1992 * hou_X[, "y92"] + 1994 * hou_X[, "y94"] + 1996 * hou_X[, "y96"] +
  1998 * hou_X[, "y98"] + 2000 * hou_X[, "y00"] + 2002 * hou_X[, "y02"] +
  2004 * hou_X[, "y04"] + 2006 * hou_X[, "y06"] + 2008 * hou_X[, "y08"] +
  2010 * hou_X[, "y10"] + 2012 * hou_X[, "y12"] + 2014 * hou_X[, "y14"] +
  2016 * hou_X[, "y16"] + 2018 * hou_X[, "y18"]
house_svm_counterfactuals[, year := year[index]]
# actual
hou_pred_X <- hou_X
pred <- do.call(rbind, mclapply(house_boot_svm_models, predict,
  newdata = hou_pred_X, mc.cores = 6))
house_svm_counterfactuals[actual == 1,
  boot_y := pred[cbind(boot, index)]]
# zero spending adv
hou_pred_X <- hou_X
hou_pred_X[, 1] <- 0
hou_pred_X[, 2] <- min(hou_X[, "log_total_spending"])
pred <- do.call(rbind, mclapply(house_boot_svm_models, predict,
  newdata = hou_pred_X, mc.cores = 6))
house_svm_counterfactuals[actual == 0 & dem_spend_adv == 0,
  boot_y := pred[cbind(boot,index)]]
# max dem spending
hou_pred_X <- hou_X
hou_pred_X[, 1] <- hou_tail_lims[2]
hou_pred_X[, 2] <- house_data[, log10(total_spending_dem_max)]
pred <- do.call(rbind, mclapply(house_boot_svm_models, predict,
  newdata = hou_pred_X, mc.cores = 6))
house_svm_counterfactuals[actual == 0 &
    dem_spend_adv == hou_tail_lims[2],
  boot_y := pred[cbind(boot,index)]]
# max rep spending
hou_pred_X <- hou_X
hou_pred_X[, 1] <- hou_tail_lims[1]
hou_pred_X[, 2] <- house_data[, log10(total_spending_rep_max)]
pred <- do.call(rbind, mclapply(house_boot_svm_models, predict,
  newdata = hou_pred_X, mc.cores = 6))
house_svm_counterfactuals[actual == 0 &
    dem_spend_adv == hou_tail_lims[1],
  boot_y := pred[cbind(boot,index)]]
save(house_svm_counterfactuals,
  file = paste0(results_path, "house_svm_counterfactuals.RData"))
